edf24d
@@ -27,6 +27,7 @@
 import java.util.Map;
 import java.util.Stack;
 
+import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.ql.exec.ExtractOperator;
 import org.apache.hadoop.hive.ql.exec.GroupByOperator;
 import org.apache.hadoop.hive.ql.exec.JoinOperator;
@@ -230,8 +231,13 @@
protected boolean merge(ReduceSinkOperator cRS, JoinOperator pJoin, int minReduc
     }
 
     /**
-     * Current RSDedup remove/replace child RS. So always copies
+     * Current RSDedup remove/replace child RS. For key columns,
+     * sorting order, and the number of reducers, copy
      * more specific part of configurations of child RS to that of parent RS.
+     * For partitioning columns, if both child RS and parent RS have been assigned
+     * partitioning columns, we will choose the more general partitioning columns.
+     * If parent RS has not been assigned any partitioning column, we will use
+     * partitioning columns (if exist) of child RS.
      */
     protected boolean merge(ReduceSinkOperator cRS, ReduceSinkOperator pRS, int minReducer)
         throws SemanticException {
@@ -239,20 +245,57 @@
protected boolean merge(ReduceSinkOperator cRS, ReduceSinkOperator pRS, int minR
       if (result == null) {
         return false;
       }
+
       if (result[0] > 0) {
-        ArrayList<ExprNodeDesc> childKCs = cRS.getConf().getKeyCols();
+        // The sorting columns of the child RS are more specific than
+        // those of the parent RS. Assign sorting columns of the child RS
+        // to the parent RS.
+        List<ExprNodeDesc> childKCs = cRS.getConf().getKeyCols();
         pRS.getConf().setKeyCols(ExprNodeDescUtils.backtrack(childKCs, cRS, pRS));
       }
-      if (result[1] > 0) {
-        ArrayList<ExprNodeDesc> childPCs = cRS.getConf().getPartitionCols();
-        pRS.getConf().setPartitionCols(ExprNodeDescUtils.backtrack(childPCs, cRS, pRS));
+
+      if (result[1] < 0) {
+        // The partitioning columns of the parent RS are more specific than
+        // those of the child RS.
+        List<ExprNodeDesc> childPCs = cRS.getConf().getPartitionCols();
+        if (childPCs != null && !childPCs.isEmpty()) {
+          // If partitioning columns of the child RS are assigned,
+          // assign these to the partitioning columns of the parent RS.
+          pRS.getConf().setPartitionCols(ExprNodeDescUtils.backtrack(childPCs, cRS, pRS));
+        }
+      } else if (result[1] > 0) {
+        // The partitioning columns of the child RS are more specific than
+        // those of the parent RS.
+        List<ExprNodeDesc> parentPCs = pRS.getConf().getPartitionCols();
+        if (parentPCs == null || parentPCs.isEmpty()) {
+          // If partitioning columns of the parent RS are not assigned,
+          // assign partitioning columns of the child RS to the parent RS.
+          ArrayList<ExprNodeDesc> childPCs = cRS.getConf().getPartitionCols();
+          pRS.getConf().setPartitionCols(ExprNodeDescUtils.backtrack(childPCs, cRS, pRS));
+        }
       }
+
       if (result[2] > 0) {
+        // The sorting order of the child RS is more specific than
+        // that of the parent RS. Assign the sorting order of the child RS
+        // to the parent RS.
+        if (result[0] <= 0) {
+          // Sorting columns of the parent RS are more specific than those of the
+          // child RS but Sorting order of the child RS is more specific than
+          // that of the parent RS.
+          throw new SemanticException("Sorting columns and order don't match. " +
+              "Try set " + HiveConf.ConfVars.HIVEOPTREDUCEDEDUPLICATION + "=false;");
+        }
         pRS.getConf().setOrder(cRS.getConf().getOrder());
       }
+
       if (result[3] > 0) {
+        // The number of reducers of the child RS is more specific than
+        // that of the parent RS. Assign the number of reducers of the child RS
+        // to the parent RS.
         pRS.getConf().setNumReducers(cRS.getConf().getNumReducers());
       }
+
       return true;
     }
 
